Visualizations in R

Libraries

Unnecessary staff

load data

data <- read.csv("owid-covid-data.csv", stringsAsFactors = F)
data$date <- as.Date(data$date, "%Y-%m-%d")
datatable(head(data), caption = "Table 1: This is a simple caption for the table.", 
    class = "cell-border stripe", options = list(pageLength = 5))
temp <- data.frame(data) %>% select(continent, location, gdp_per_capita, population_density, 
    total_cases, population)
temp <- subset(temp, location != "World" & location != "International")
data_agg <- aggregate(temp, list(temp$location, temp$continent, temp$gdp_per_capita, 
    temp$population_density), max)
data_agg$Group.1 <- NULL
data_agg$Group.2 <- NULL
data_agg$Group.3 <- NULL
data_agg$Group.4 <- NULL

head(data_agg)
      continent  location gdp_per_capita population_density total_cases
1          Asia  Mongolia      11840.846              1.980        1517
2        Africa   Namibia       9541.808              3.078       30198
3       Oceania Australia      44648.710              3.202       28708
4        Europe   Iceland      46482.958              3.404          NA
5 South America  Suriname      13767.119              3.612        7469
6        Africa     Libya      17881.509              3.623          NA
  population
1    3278292
2    2540916
3   25499881
4     341250
5     586634
6    6871287
data_agg[data_agg$location == "Zimbabwe", ]
   continent location gdp_per_capita population_density total_cases population
51    Africa Zimbabwe       1899.775             42.729       26881   14862927
data_sel <- data.frame(data) %>% select(continent, location, date, new_cases, new_deaths, 
    total_cases, total_deaths, population)
head(data_sel)
  continent    location       date new_cases new_deaths total_cases
1      Asia Afghanistan 2020-02-24         1         NA           1
2      Asia Afghanistan 2020-02-25         0         NA           1
3      Asia Afghanistan 2020-02-26         0         NA           1
4      Asia Afghanistan 2020-02-27         0         NA           1
5      Asia Afghanistan 2020-02-28         0         NA           1
6      Asia Afghanistan 2020-02-29         0         NA           1
  total_deaths population
1           NA   38928341
2           NA   38928341
3           NA   38928341
4           NA   38928341
5           NA   38928341
6           NA   38928341
library(plotly)

a <- ggplot(data_agg) + geom_point(aes(x = total_cases, y = population, color = continent, 
    size = gdp_per_capita)) + labs(x = "Total Cases", y = "Population", title = "Covid total cases vs population", 
    caption = "source: ") + theme(panel.background = element_rect(fill = "grey", 
    color = "white", size = 1.2), plot.background = element_rect(fill = "grey"), 
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold", color = "brown"), 
    plot.caption = element_text(face = "italic", color = "brown"), panel.grid.major = element_line(linetype = "dashed", 
        size = 0.1), panel.grid.minor = element_blank()) + scale_x_continuous(trans = "log10") + 
    scale_y_continuous(trans = "log10")

ggplotly(a)
ggplot(data=data_agg, aes(x= total_cases, y = continent, fill=continent)) +
  geom_bar(width = 0.5, stat = 'identity') +
  labs(title = 'Total cases by continent.',
       subtitle = 'subtitle') + 
  xlab('Total Cases') +
  ylab('Continents') +
  theme(legend.position = "none",
        text = element_text(size=8),
        axis.text.y = element_text(hjust=1)) +
  geom_text(aes(label = total_cases), vjust =0.5, hjust=-0.03, 
            color = 'darkgreen', size = 3.5) +# since the bars are horizontal I put
  # the labels next to the bars
  theme(plot.title = element_text(size =12, face='bold', 
                                  color = 'brown'))

ggplot(data = data_agg, aes(x = total_cases)) + geom_bar(stat = "count", fill = "darkgreen", 
    color = "darkgreen", aes(y = ..count..)) + theme_minimal() + geom_text(aes(label = ..count..), 
    stat = "count", vjust = -0.4)

ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(data = iris[, 
    -5], fill = "grey", alpha = 0.5, bins = 30) + geom_histogram(colour = "black", 
    bins = 30) + facet_wrap(~Species) + guides(fill = FALSE)

ggplot(data_agg, aes(x = gdp_per_capita, fill = continent)) + geom_histogram(data = data_agg[, 
    -5], fill = "grey", alpha = 0.5, bins = 30) + geom_histogram(colour = "black", 
    bins = 30) + facet_wrap(~continent) + guides(fill = FALSE)

# ggplotly(b)
# library(gganimate) library(gapminder) start_pause <- 0 end_pause <- 0
# ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
# geom_point(alpha = 0.7, show.legend = FALSE) + scale_colour_manual(values =
# country_colors) + scale_size(range = c(2, 12)) + scale_x_log10() +
# facet_wrap(~continent) + # Here comes the gganimate specific bits labs(title =
# 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
# transition_time(year) + ease_aes('linear')
head(data_sel)
  continent    location       date new_cases new_deaths total_cases
1      Asia Afghanistan 2020-02-24         1         NA           1
2      Asia Afghanistan 2020-02-25         0         NA           1
3      Asia Afghanistan 2020-02-26         0         NA           1
4      Asia Afghanistan 2020-02-27         0         NA           1
5      Asia Afghanistan 2020-02-28         0         NA           1
6      Asia Afghanistan 2020-02-29         0         NA           1
  total_deaths population
1           NA   38928341
2           NA   38928341
3           NA   38928341
4           NA   38928341
5           NA   38928341
6           NA   38928341
data_anim <- data_sel %>% filter(location %in% c("Germany", "Canada"))
head(data_anim)
      continent location       date new_cases new_deaths total_cases
1 North America   Canada 2020-01-26         1         NA           1
2 North America   Canada 2020-01-27         0         NA           1
3 North America   Canada 2020-01-28         1         NA           2
4 North America   Canada 2020-01-29         0         NA           2
5 North America   Canada 2020-01-30         0         NA           2
6 North America   Canada 2020-01-31         2         NA           4
  total_deaths population
1           NA   37742157
2           NA   37742157
3           NA   37742157
4           NA   37742157
5           NA   37742157
6           NA   37742157
# data_anim <- data_sel %>% filter(location %in% c('Germany','Canada', 'Mexico',
# 'Italy', 'Spain', 'Poland'))

data_anim <- data_sel %>% filter(continent %in% c("africa", "Asia", "Europe", "North America", 
    "South America", "Oceania"))
data_anim <- data_anim[complete.cases(data_anim), ]
nice <- ggplot(data_anim, aes(total_cases, total_deaths, size = population, colour = continent)) + 
    geom_point(alpha = 0.7, show.legend = FALSE) + scale_size(range = c(2, 12)) + 
    scale_x_log10() + facet_wrap(~continent) + # Here comes the gganimate specific bits
labs(title = "Year: {frame_time}", x = "Total Cases", y = "Total Deaths") + transition_time(date) + 
    ease_aes("linear")

anim_save("nice.gif", nice)

for maps:

library(tidyverse)
library(rnaturalearth)
library(cowplot)
library(sf)
library(ggmap)
library(leaflet)
library("rnaturalearth")
library("rnaturalearthdata")
library("sf")
library("rgeos")

### 
library(tidyverse)
library(ggplot2)
library(readr)
library(maps)
library(viridis)
world <- ne_countries(scale = "medium", returnclass = "sf")
world <- data.frame(world)
world <- world %>% rename(location = name)
head(world)
  scalerank      featurecla labelrank  sovereignt sov_a3 adm0_dif level    type
0         3 Admin-0 country         5 Netherlands    NL1        1     2 Country
  admin adm0_a3 geou_dif geounit gu_a3 su_dif subunit su_a3 brk_diff location
0 Aruba     ABW        0   Aruba   ABW      0   Aruba   ABW        0    Aruba
  name_long brk_a3 brk_name brk_group abbrev postal formal_en formal_fr
0     Aruba    ABW    Aruba      <NA>  Aruba     AW     Aruba      <NA>
  note_adm0 note_brk name_sort name_alt mapcolor7 mapcolor8 mapcolor9
0     Neth.     <NA>     Aruba     <NA>         4         2         2
  mapcolor13 pop_est gdp_md_est pop_year lastcensus gdp_year
0          9  103065       2258       NA       2010       NA
               economy              income_grp wikipedia fips_10 iso_a2 iso_a3
0 6. Developing region 2. High income: nonOECD        NA    <NA>     AW    ABW
  iso_n3 un_a3 wb_a2 wb_a3 woe_id adm0_a3_is adm0_a3_us adm0_a3_un adm0_a3_wb
0    533   533    AW   ABW     NA        ABW        ABW         NA         NA
      continent region_un subregion                 region_wb name_len long_len
0 North America  Americas Caribbean Latin America & Caribbean        5        5
  abbrev_len tiny homepart                       geometry
0          5    4       NA MULTIPOLYGON (((-69.89912 1...
 [ reached 'max' / getOption("max.print") -- omitted 5 rows ]
head(data_agg)
      continent  location gdp_per_capita population_density total_cases
1          Asia  Mongolia      11840.846              1.980        1517
2        Africa   Namibia       9541.808              3.078       30198
3       Oceania Australia      44648.710              3.202       28708
4        Europe   Iceland      46482.958              3.404          NA
5 South America  Suriname      13767.119              3.612        7469
6        Africa     Libya      17881.509              3.623          NA
  population
1    3278292
2    2540916
3   25499881
4     341250
5     586634
6    6871287
# unique(world$name) unique(data_sel$location)
# Default theme:
theme_set(theme_bw())

world <- merge(x = world, y = data_agg, by = "location", all.x = TRUE)
head(world)
     location scalerank      featurecla labelrank  sovereignt sov_a3 adm0_dif
1 Afghanistan         1 Admin-0 country         3 Afghanistan    AFG        0
  level              type       admin adm0_a3 geou_dif     geounit gu_a3 su_dif
1     2 Sovereign country Afghanistan     AFG        0 Afghanistan   AFG      0
      subunit su_a3 brk_diff   name_long brk_a3    brk_name brk_group abbrev
1 Afghanistan   AFG        0 Afghanistan    AFG Afghanistan      <NA>   Afg.
  postal                    formal_en formal_fr note_adm0 note_brk   name_sort
1     AF Islamic State of Afghanistan      <NA>      <NA>     <NA> Afghanistan
  name_alt mapcolor7 mapcolor8 mapcolor9 mapcolor13  pop_est gdp_md_est
1     <NA>         5         6         8          7 28400000      22270
  pop_year lastcensus gdp_year                   economy    income_grp
1       NA       1979       NA 7. Least developed region 5. Low income
  wikipedia fips_10 iso_a2 iso_a3 iso_n3 un_a3 wb_a2 wb_a3 woe_id adm0_a3_is
1        NA    <NA>     AF    AFG    004   004    AF   AFG     NA        AFG
  adm0_a3_us adm0_a3_un adm0_a3_wb continent.x region_un     subregion
1        AFG         NA         NA        Asia      Asia Southern Asia
   region_wb name_len long_len abbrev_len tiny homepart
1 South Asia       11       11          4   NA        1
                        geometry continent.y gdp_per_capita population_density
1 MULTIPOLYGON (((74.89131 37...        Asia       1803.987             54.422
  total_cases population
1       53938   38928341
 [ reached 'max' / getOption("max.print") -- omitted 5 rows ]
# ggplot(data = world) + geom_sf(aes(fill = income_grp )) + coord_sf(expand =
# FALSE) + # geom_sf(aes(fill = pop_est)) + scale_fill_brewer(palette = 'YlOrBr')
# + # scale_fill_viridis_c(option = 'plasma', trans = 'sqrt', guide =
# guide_colorbar(barwidth = 30), name = NULL) + theme(legend.position = 'right')
# # theme(legend.position = 'bottom')
datacov <- read_csv("time_series_covid19_confirmed_global.csv")

head(datacov)
# A tibble: 6 x 368
  `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
  <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
1 <NA>             Afghanistan       33.9  67.7          0         0         0
2 <NA>             Albania           41.2  20.2          0         0         0
3 <NA>             Algeria           28.0   1.66         0         0         0
4 <NA>             Andorra           42.5   1.52         0         0         0
5 <NA>             Angola           -11.2  17.9          0         0         0
6 <NA>             Antigua and Bar~  17.1 -61.8          0         0         0
# ... with 361 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
#   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
#   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
#   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
#   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
#   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
#   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
#   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
#   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
#   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
#   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
#   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
#   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
#   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
#   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
#   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
#   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
#   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
#   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
#   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
#   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
#   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
#   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
#   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
#   `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
#   `5/2/20` <dbl>, `5/3/20` <dbl>, ...
world <- map_data("world")

# cutoffs based on the number of cases
mybreaks <- c(1, 20, 100, 1000, 50000)

ggplot() + geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = "grey", 
    alpha = 0.3) + geom_point(data = datacov, aes(x = Long, y = Lat, size = `3/3/20`, 
    color = `3/3/20`), stroke = F, alpha = 0.7) + scale_size_continuous(name = "Cases", 
    trans = "log", range = c(1, 7), breaks = mybreaks, labels = c("1-19", "20-99", 
        "100-999", "1,000-49,999", "50,000+")) + # scale_alpha_continuous(name='Cases', trans='log', range=c(0.1,
# 0.9),breaks=mybreaks) +
scale_color_viridis_c(option = "inferno", name = "Cases", trans = "log", breaks = mybreaks, 
    labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) + theme_void() + 
    guides(colour = guide_legend()) + labs(caption = "Data Repository provided by Johns Hopkins CSSE. Visualization by DataScience+ ") + 
    theme(legend.position = "bottom", text = element_text(color = "#22211d"), plot.background = element_rect(fill = "#ffffff", 
        color = NA), panel.background = element_rect(fill = "#ffffff", color = NA), 
        legend.background = element_rect(fill = "#ffffff", color = NA))

# remotes::install_github('GuangchuangYu/nCov2019') source:
# https://github.com/GuangchuangYu/shadowtext/blob/master/R/geom-shadowtext.R
# source: https://github.com/GuangchuangYu/nCov2019

library("remotes")
# remotes::install_github('GuangchuangYu/nCov2019', dependencies = TRUE)
library("nCov2019")

library(ggplot2)
y <- load_nCov2019(lang = "en")
d <- y["global"]
max_time <- max(d$time)
min_time <- max_time - 7
d <- na.omit(d[d$time >= min_time & d$time <= max_time, ])
dd <- d[d$time == max(d$time, na.rm = TRUE), ]
d$country <- factor(d$country, levels = unique(dd$country[order(dd$cum_confirm)]))
breaks = c(10, 100, 1000, 10000)
ggplot(d, aes(time, country)) + geom_tile(aes(fill = cum_confirm), color = "black") + 
    scale_fill_viridis_c(trans = "log", breaks = breaks, labels = breaks) + xlab(NULL) + 
    ylab(NULL) + scale_x_date(date_labels = "%Y-%m-%d") + theme_minimal()

# require(nCov2019) y <- load_nCov2019(lang = 'en', source='github') d =
# y['global'] require(dplyr) dd <- filter(d, time == time(y)) %>%
# arrange(desc(cum_confirm)) dd = dd[1:40, ] dd$country = factor(dd$country,
# levels=dd$country) dd$angle = 1:40 * 360/40 require(ggplot2) p <- ggplot(dd,
# aes(country, cum_confirm, fill=cum_confirm)) + geom_col(width=1,
# color='grey90') + geom_col(aes(y=I(5)), width=1, fill='grey90', alpha = .2) +
# geom_col(aes(y=I(3)), width=1, fill='grey90', alpha = .2) +
# geom_col(aes(y=I(2)), width=1, fill = 'white') + scale_y_log10() +
# scale_fill_gradientn(colors=c('darkgreen', 'green', 'orange',
# 'firebrick','red'), trans='log') + geom_text(aes(label=paste(country,
# cum_confirm, sep='\n'), y = cum_confirm *.8, angle=angle), data=function(d)
# d[d$cum_confirm > 700,], size=3, color = 'white', fontface='bold', vjust=1) +
# geom_text(aes(label=paste0(cum_confirm, ' cases ', country), y =
# max(cum_confirm) * 2, angle=angle+90), data=function(d) d[d$cum_confirm <
# 700,], size=3, vjust=0) + coord_polar(direction=-1) + theme_void() +
# theme(legend.position='none') + ggtitle('COVID19 global trend', time(y))
# print(p)
# library('maps') x = world plot(x)

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.